import bs4
from langchain_community.document_loaders import WebBaseLoader


def load_data():
    loader = WebBaseLoader(
        web_paths=("https://blogs.nvidia.com/blog/what-is-retrieval-augmented-generation/",),
        bs_kwargs=dict(
            parse_only=bs4.SoupStrainer(
                class_=("entry-content", "entry-header", "entry-title")
            )
        ),
    )
    docs = loader.load()
    return docs